Supplement A: Community Clustering

COMPILED

March 6, 2024

1 Introduction

Goal: identify geographic communities

Data: Village Ecodynamics Project and Southwest Social Networks databases

Method: using a somewhat novel clustering algorithm outlined below

This document is setup so that it focuses on the actual steps of the analysis, using verbose names for functions that should make their purpose clear. You can also dig into what each function does by unfolding the relevant code chunk. The critical R package for doing path analysis is cppRouting. It’s optimized for road networks, but still very fast. Someone should write a path-finding package (maybe using Rust?) optimized for grid networks…

2 R Preamble

library(cppRouting)
library(furrr)
library(ggfx)
library(ggh4x)
library(ggnewscale)
library(ggspatial)
library(gt)
library(here)
library(igraph)
library(magick)
library(patchwork)
library(sf)
library(terra)
library(tidyterra)
library(tidyverse)
# cppRouting uses as.character() on node IDs, which is fine,
# but as.character(1000000) leads to "1e-6" instead of "1000000", and
# this can lead to issues when matching IDs
# to fix this, just remove the use of scientific notation
options(scipen = 9999)

# suppress progress bar and other terra output
terra::terraOptions(progress = 0, print = FALSE)

# for the sensitivity analysis, we run the entire workflow for each region in 
# its own R session, hopefully to speed things up
plan(multisession, workers = 3)

Some ggplot defaults

Code
region_colors <- c(
  "cib" = "#D57300", 
  "cmv" = "#009E8C", 
  "nrg" = "#9B70FF"
)

region_labels <- c(
  "cmv" = "Central Mesa Verde",
  "nrg" = "Northern Rio Grande",
  "cib" = "Cibola"
)

pretty_labels <- as_labeller(c(
  "n_communities" = "No. of Communities",
  "commute" = "Mean Commute to Nearest Center [mins]",
  "room_density" = "Room Density [N/sq.km]",
  "farm_area" = "Mean Area for Farms [sq.km/N]",
  "center_area" = "Mean Area for Centers [sq.km/N]",
  "farm_ratio" = "Ratio of Farms [N] to Centers [N]",
  "area" = "Total Area [sq.km]",
  "djoin" = "D-join [mins]",
  "dmax" = "D-max [mins]",
  "p" = "P [proportion in D-join]",
  "agg_factor" = "Grid Resolution [m]"
))

squished_labels <- as_labeller(c(
  "n_communities" = "No. of\nCommunities",
  "commute" = "Mean Commute\nto Nearest Center\n[mins]",
  "room_density" = "Room Density\n[N/sq.km]",
  "farm_area" = "Mean Area\nfor Farms [sq.km/N]",
  "center_area" = "Mean Area\nfor Centers [sq.km/N]",
  "farm_ratio" = "Ratio of\nFarms [N] to Centers [N]",
  "area" = "Total Area\n[sq.km]",
  "djoin" = "D-join\n[mins]",
  "dmax" = "D-max\n[mins]",
  "p" = "P\n[prop. in D-join]",
  "agg_factor" = "Grid Resolution\n[m]"
))

theme_set(theme_bw())

theme_update(
  axis.text = element_text(size = rel(0.75), color = "gray50"),
  axis.ticks = element_line(linewidth = 0.2, color = "gray85"),
  axis.title = element_text(size = rel(1)),
  legend.text = element_text(size = rel(0.9)),
  legend.title = element_text(size = rel(0.9)),
  panel.grid = element_blank(),
  plot.title = element_text(size = rel(1), margin = margin(b = 2)),
  strip.background = element_blank(),
  strip.text = element_text(size = rel(1))
)

# trim all white space around image, 
# but then add a few white pixels back (dx, dy)
# all done with the magic of {magick}
prepare_image <- function(x, dx = 2, dy = 30, color = "white") {
  
  img <- image_read(path = x)
  
  img <- image_trim(img)
  
  info <- image_info(img)
    
  new_width <- info[["width"]] + dx
  new_height <- info[["height"]] + dy
  
  img <- image_extent(
    img, 
    geometry_area(new_width, new_height), 
    color = color
  )
  
  image_write(img, path = x)
  
}

3 Data

The necessary data for the analysis include a polygon defining the region of interest, the point locations of farms and centers, and a digital elevation model (DEM).

Code
build_region <- function(region){
  
  gpkg <- here("data", "community-centers.gpkg")
  
  region_query <- paste0("SELECT * FROM regions WHERE region = '", region, "'")
  r <- read_sf(gpkg, query = region_query)
  
  site_query <- paste0("SELECT * FROM sites WHERE region = '", region, "'")
  s <- read_sf(gpkg, query = site_query) |> st_filter(r)
  
  dem <- here("data", paste0("dem-", region, ".tif")) |> rast()
  
  region_list <- list(
    region = r,
    sites = s,
    dem = dem
  )
  
  # save metadata
  prj <- ifelse(region == "nrg", 26913, 26912)
  
  class(region_list) <- "region"
  
  attr(region_list, "name") <- region
  attr(region_list, "preferred_projection") <- prj
  attr(region_list, "grid_crs") <- st_crs(crs(dem))$epsg
  
  region_list
  
}
cmv <- build_region("cmv")
nrg <- build_region("nrg")
cib <- build_region("cib")  

Code execution time: 0.35s

Code
list(cmv, nrg, cib) |> 
  map(\(x){
    
    tibble(
      region = attr(x, "name"),
      min =  global(x[["dem"]], min, na.rm = TRUE)$min,
      max =  global(x[["dem"]], max, na.rm = TRUE)$max,
      mean = global(x[["dem"]], mean, na.rm = TRUE)$mean,
      sd =   global(x[["dem"]], sd, na.rm = TRUE)$sd
    )
    
  }) |> 
  bind_rows() |> 
  gt() |> 
  tab_header("Elevation") |> 
  fmt_number(-region, decimals = 1) |> 
  opt_align_table_header("left")
Elevation
region min max mean sd
cmv 1,399.2 3,036.3 1,990.2 276.7
nrg 1,587.0 3,847.5 2,220.2 371.4
cib 1,770.9 2,777.6 2,146.0 160.1
Code
plot.region <- function(x, ...){
  
  slope <- terrain(x[["dem"]], "slope", unit = "radians")
  aspect <- terrain(x[["dem"]], "aspect", unit = "radians")
  
  hill <- shade(slope, aspect, 30, 45)
  hill <- setValues(hill, scales::rescale(values(hill), to = c(1, 1000)))
  hill <- round(hill)
  
  h <- list(
    shade = hill,
    grays = hcl.colors(1000, "Grays")[values(hill)]
  )
  
  ggplot() +
    geom_spatraster(
      data = h[["shade"]], 
      fill = h[["grays"]],
      maxcell = Inf
    ) +
    geom_spatraster(data = x[["dem"]], alpha = 0.5) +
    scale_fill_distiller(palette = "Greys", guide = "none") +
    ggnewscale::new_scale_fill() +
    scale_fill_hypso_c(
      name = "Elevation (m)",
      palette = "utah_1",
      limits = c(1350, 3850),
      alpha = 0.5,
      na.value = "transparent"
    ) +
    with_outer_glow(
      geom_sf(
        data = x[["sites"]] |> filter(center == 0),
        shape = 16,
        color = "gray25",
        alpha = 0.5,
        size = 0.8
      ),
      colour = "white",
      sigma = 1,
      expand = 1
    ) +
    with_outer_glow(
      geom_sf(
        data = x[["sites"]] |> filter(center == 1),
        shape = 17,
        color = "#CD3C00",
        alpha = 0.9,
        size = 1
      ),
      colour = "white",
      sigma = 1,
      expand = 1
    ) +
    annotation_scale(
      aes(location = "bl", line_col = "white", text_col = "white"),
      height = unit(0.18, "cm"),
      line_width = 0.5
    ) +
    coord_sf(expand = FALSE) +
    theme(
      axis.text = element_blank(),
      axis.ticks = element_blank(),
      axis.title = element_blank(),
      legend.position = "none",
      plot.margin = margin(l = 2)
    )
  
}

gg <- (plot(cmv) + ggtitle("Central Mesa Verde")) +
  (plot(nrg) + ggtitle("Northern Rio Grande")) + 
  (plot(cib) + ggtitle("Cibola"))

fn <- here("figures", "overview-maps.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 7, 
  height = 4.5,
  dpi = 600,
  bg = "white"
)

prepare_image(fn)

remove(plot.region, gg, fn)
An overview map showing the distribution of farms and community centers across the three study areas.
Figure 1: An overview map showing the distribution of farms and community centers across the three study areas.

4 Graph

It’s important to note that throughout we track movement between grid cells, not site points. So, all site points that fall into the same grid cell are assumed to be the same location, and if any of those sites are centers, then the whole place is considered a center.

4.1 Convert grid to graph

Before converting the grid to a graph, we aggregate grid cells. Aggregating reduces the resolution of the grid and by extension the number of grid cells the raster contains. Since the grid cells are nodes in the grid graph, aggregating simplifies the graph as well. This speeds up the path analysis considerably, but it does come at the cost of precision in terms of path analysis across the landscape, though that is not as bad as it sounds. The idea that we could build a model that is perfectly isomorphic with the actual or true path taken by people a thousand years ago is wildly implausible. The idea of the ‘true path’ is also problematic as we’re not evaluating a single commute, but rather the path people should take more often than not. So, the real question is what level of precision is sufficient for evaluating that. And the answer is, of course, it just depends.

Are you walking blindfolded along the edge of a deep, precipitous cliff? Then centimeters are probably important to you - really, really important to you. But if you’re just trying to get a reasonable estimate of the agricultural catchment of an entirely pedestrian population based on the distribution of farms around urban centers, probably the difference between ~30 m and ~90 m grid cells isn’t that big of a deal. If that doesn’t convince you, though, we do some limited sensitivity analysis down below.

Code
aggregate_grid <- function(x, .factor){
  
  if (.factor > 1){
    
    x[["dem"]] <- terra::aggregate(
      x[["dem"]], 
      fact = .factor,
      fun = "mean",
      na.rm = TRUE
    ) 
    
  }
  
  attr(x, "agg_fact") <- .factor
  
  x
  
}

group_sites_by_cell <- function(x){
  
  # get raster cell IDs for site points
  x[["sites"]] <- x[["sites"]] |> 
    mutate(cell = cells(x[["dem"]], vect(x[["sites"]]))[,2])
  
  # group relevant site attribute data by grid cell
  x[["cell_table"]] <- x[["sites"]] |> 
    st_drop_geometry() |> 
    select(cell, center, n_room) |> 
    group_by(cell) |> 
    summarize(
      center = ifelse(sum(center) > 0, 1, 0),
      n_room = sum(n_room)
    )
  
  x
  
}

add_graph <- function(x, max_slope = NULL){
  
  # keep track of metadata
  attr(x, "max_slope") <- max_slope
  
  # cells(<SpatRaster>) returns cell IDs 
  # for all cells with non-NA values
  non_NA_cells <- cells(x[["dem"]])
  
  # cells(<SpatRaster>, <numeric>) returns cell IDs 
  # for all cells with values matching <numeric>
  NA_cells <- cells(x[["dem"]], NA_real_)[[1]]
  
  # get adjacency matrix 
  graph <- adjacent(
    x[["dem"]],
    cells = non_NA_cells,
    directions = 8,
    pairs = TRUE
  ) |> as.data.frame()
  
  # remove rows with to-cells that have NA values
  graph <- graph |> filter(!to %in% NA_cells)
    
  # rise = difference between adjacent cells
  # run = distance between adjacent cells
  # slope = rise/run
  # convert slope 
  #   to radians with rads = atan(rise/run)
  #   to degrees with degs = rads * 180/pi 
  graph <- graph |> 
    mutate(
      from_elevation = terra::extract(x[["dem"]], from)[,1],
      to_elevation = terra::extract(x[["dem"]], to)[,1],
      rise = from_elevation - to_elevation,
      run = distance(
        xyFromCell(x[["dem"]], from),
        xyFromCell(x[["dem"]], to),
        lonlat = is.lonlat(x[["dem"]]),
        pairwise = TRUE
      ),
      slope = atan(rise/run) * (180/pi)
    )
  
  # what is a realistic slope people would try to hike?
  # remove all edges with slopes greater than max_slope
  if (!is.null(max_slope)) { graph <- graph |> filter(slope <= max_slope) }
  
  # now that we have slope, the strategy is this:
  # 1) calculate the hiking speed on each slope estimate
  # 2) calculate the length of each slope (the length of the hypotenuse)
  # 3) divide length by speed to get travel time (this is the edge weight)

  # Campbell's hiking function, see Campbell et al 2019
  # note: campbell function wants degrees!
  campbell <- function(x) {
    
    # values provided in Supplement Table S3
    # using the 5th percentile of the Lorentz curve as recommended by Campbell
    a <- -1.527
    b <- 14.041
    c <- 36.813
    d <- 0.320
    e <- -0.00273
    
    # lorentz distribution
    lorentz <- (1 / ((pi * b) * (1 + ((x - a) / b)^2)))
    
    # modified lorentz
    (c * lorentz) + d + (e * x)
    
  }

  # calculate 
  #   speed using Campbell's hiking function
  #   length of hypotenuse
  #   travel time between adjacent cells
  graph <- graph |> 
    mutate(
      speed = campbell(slope),
      cost = sqrt(run^2L + rise^2L)/speed
    )

  # get coordinates table with cell IDs for safe joins
  coords <- data.frame(ID = non_NA_cells) |> 
    bind_cols(xyFromCell(x[["dem"]], non_NA_cells)) |> 
    rename_with(toupper)

  x[["graph"]] <- makegraph(
    graph |> select(from, to, cost),
    directed = TRUE,
    coords = coords
  )

  x
  
}
cmv <- cmv |> 
  aggregate_grid(.factor = 3) |> 
  group_sites_by_cell() |> 
  add_graph(max_slope = 45)

nrg <- nrg |> 
  aggregate_grid(.factor = 3) |> 
  group_sites_by_cell() |> 
  add_graph(max_slope = 45)

cib <- cib |> 
  aggregate_grid(.factor = 3) |> 
  group_sites_by_cell() |> 
  add_graph(max_slope = 45)

Code execution time: 75.47s (~1.26 minutes)

4.2 Add distance matrix

To calculate travel time between every pair of site points, we use Dijkstra’s algorithm as implemented in the R package {cppRouting}. Note that we also average the from- and to- travel times, so the results are isotropic (meaning direction is irrelevant).

Code
add_distance_matrix <- function(x){
  
  cell_ids <- x[["cell_table"]][["cell"]]
  
  cells_in_graph <- x[["graph"]][["dict"]][["ref"]]
  
  dm <- get_distance_matrix(
    x[["graph"]],
    from = cell_ids,  
    to = cell_ids
  )
  
  # make symmetric by averaging off-diagonals
  lt <- lower.tri(dm)
  ut <- upper.tri(dm)
  
  dm[lt] <- rowMeans(cbind(dm[lt], t(dm)[lt]), na.rm = TRUE)
  dm[ut] <- t(dm)[ut]
  
  diag(dm) <- 0
  
  # add to list and return
  x[["dm"]] <- dm
  
  x
  
}
cmv <- cmv |> add_distance_matrix()
nrg <- nrg |> add_distance_matrix()
cib <- cib |> add_distance_matrix()

Code execution time: 145.54s (~2.43 minutes)

Code
randomize_commutes <- function(x, n = 1000){
  
  # subset dm (distance matrix) to get dm[farms, centers]
  farms <- x[["cell_table"]] |> filter(center == 0)
  centers <- x[["cell_table"]] |> filter(center == 1)
  
  i <- which(rownames(x[["dm"]]) %in% farms[["cell"]])
  j <- which(colnames(x[["dm"]]) %in% centers[["cell"]])
  
  dm <- x[["dm"]][i, j]
  
  # get observed density
  observed <- apply(dm, 1, min)
  observed <- density(
    observed,
    kernel = "gaussian", 
    n = 512, 
    from = 0, 
    to = 7200
  )
  
  # build raster to randomly sample cells
  r <- setValues(x[["dem"]], NA)
  
  # don't sample past furthest observed distance from nearest center
  dmax <- st_distance(
    x[["sites"]] |> filter(center == 0), 
    x[["sites"]] |> filter(center == 1)
  )
  
  dmax <- units::drop_units(dmax)
  dmax <- apply(dmax, 1, min)
  dmax <- max(dmax)
  
  sample_area <- x[["sites"]] |> 
    filter(center == 1) |> 
    st_buffer(dist = dmax) |> 
    st_union() |> 
    vect()
  
  i <- cells(x[["dem"]], sample_area)[, "cell"]
  
  # only use those that have nodes in the graph (excludes big slopes)
  i <- i[i %in% x[["graph"]][["dict"]][["ref"]]]
  
  r[i] <- 1
  
  # don't sample cells with centers in them
  j <- unique(centers[["cell"]])
  
  r[j] <- NA
  
  random_points <- spatSample(
    r, 
    size = n,
    method = "regular",
    cells = TRUE,
    na.rm = TRUE
  )
  
  random_points <- unique(random_points[["cell"]])
  
  dm <- get_distance_matrix(
    x[["graph"]],
    from = random_points,  
    to = colnames(dm)
  )
  
  # make symmetric by averaging off-diagonals
  lt <- lower.tri(dm)
  ut <- upper.tri(dm)
  
  dm[lt] <- rowMeans(cbind(dm[lt], t(dm)[lt]), na.rm = TRUE)
  dm[ut] <- t(dm)[ut]
  
  diag(dm) <- 0
  
  expected <- apply(dm, 1, min)
  expected <- density(
    expected,
    kernel = "gaussian", 
    n = 512, 
    from = 0, 
    to = 7200
  )
  
  tibble::tibble(
    region = attr(x, "name"),
    distance = observed[["x"]],
    observed = observed[["y"]],
    expected = expected[["y"]]
  )
  
}

set.seed(1701) # USS Enterprise

commutes <- list(cmv, nrg, cib) |> 
  map(randomize_commutes) |> 
  bind_rows()

q <- function(x, p){
  
  # subset dm (distance matrix) to get dm[farms, centers]
  farms <- x[["cell_table"]] |> filter(center == 0)
  centers <- x[["cell_table"]] |> filter(center == 1)
  
  i <- which(rownames(x[["dm"]]) %in% farms[["cell"]])
  j <- which(colnames(x[["dm"]]) %in% centers[["cell"]])
  
  dm <- x[["dm"]][i, j]
  
  observed <- apply(dm, 1, min)
  
  quant <- quantile(observed, p = p)
  quant <- unname(quant)
  
  tibble::tibble(
    region = attr(x, "name"),
    distance = quant
  )
  
}

q95 <- list(cmv, nrg, cib) |> 
  map(\(x){ q(x, 0.95) }) |> 
  bind_rows() |> 
  mutate(
    y = c(2.4, 1.8, 1.2),
    label = case_when(
      region == "cib" ~ "Cibola",
      region == "cmv" ~ "Central Mesa Verde",
      region == "nrg" ~ "Northern Rio Grande",
      TRUE ~ NA
    )
  )

gg <- ggplot() +
  geom_hline(
    yintercept = 0,
    color = "gray75",
    linewidth = 0.2
  ) +
  geom_line(
    data = commutes, 
    aes(distance/60, log(observed/expected), color = region), 
    linewidth = 0.9,
    lineend = "round"
  ) +
  geom_point(
    data = q95,
    aes(distance/60, y, color = region),
    size = 2
  ) +
  geom_text(
    data = q95,
    aes(distance/60, y, color = region, label = label),
    vjust = 0.5,
    hjust = 0,
    nudge_x = 2,
    size = 11.5/.pt
  ) +
  scale_color_manual(name = NULL, values = region_colors) +
  annotate(
    "point",
    x = min(q95[["distance"]])/60,
    y = 3.0,
    color = "gray25",
    size = 2
  ) +
  annotate(
    "text",
    label = "Observed 95% Quantile",
    x = min(q95[["distance"]])/60 + 2,
    y = 3.0,
    color = "gray25",
    vjust = 0.5,
    hjust = 0,
    size = 11.5/.pt
  ) +
  labs(
    x = "Commute to Nearest Center [mins]",
    y = "Probability Density\nlog(Observed) - log(Random)"
  ) +
  scale_x_continuous(
    breaks = seq(0, 120, by = 30), 
    expand = expansion(0.03)
  ) +
  coord_cartesian(xlim = c(0, 120)) +
  theme(
    legend.position = "none",
    panel.grid.major.x = element_line(linewidth = 0.2, color = "gray85")
  )

fn <- here("figures", "commute-kl.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 5.1,
  height = 3.3,
  dpi = 600
)

prepare_image(fn)

remove(randomize_commutes, commutes, q, q95, gg)
Difference between log probability density of obversed commute times from farms to nearest centers and log probability density of commute times from random locations to nearest centers.
Figure 2: Difference between log probability density of observed commute times from farms to nearest centers and log probability density of commute times from random locations to nearest centers.

5 Communities

Our community detection algorithm proceeds as follows:

  1. Identify community centers
  2. Join farms to their nearest center
  3. Remove distant farms
  4. Join centers to their overlapping neighbors
  5. Draw smallest concave hull encompassing all farms, centers, and paths

Step 1 is already done and stored in the data. Step 2 is technically done after step 4. It just makes more sense to think of it coming first, like we’re building up to the community - simply put, small things connect to big things, and big things merge into even bigger things.

5.1 Filter out distant farms

Code
remove_distant_farms <- function(x, dmax){
  
  # subset dm (distance matrix) to get dm[farms, centers]
  farms <- x[["cell_table"]] |> filter(center == 0)
  centers <- x[["cell_table"]] |> filter(center == 1)
  
  i <- which(rownames(x[["dm"]]) %in% farms[["cell"]])
  j <- which(colnames(x[["dm"]]) %in% centers[["cell"]])
  
  dm <- x[["dm"]][i, j]
  
  # if the distance from a farm to its nearest center
  # is greater than dmax, we remove it from the cell table
  min_distance <- apply(dm, 1, min)
  
  dm <- dm[min_distance <= dmax, ]
  
  x[["cell_table"]] <- x[["cell_table"]] |> 
    filter(cell %in% c(rownames(dm), colnames(dm)))
  
  # collect metadata
  attr(x, "dmax") <- dmax
  attr(x, "n_outliers") <- nrow(farms) - nrow(dm)
  
  x
  
}
# how many seconds in ... ?
one_hour <- 3600

cmv <- cmv |> remove_distant_farms(dmax = one_hour)
nrg <- nrg |> remove_distant_farms(dmax = one_hour)
cib <- cib |> remove_distant_farms(dmax = one_hour)

Code execution time: 0.08s

Code
list(cmv, nrg, cib) |> 
  map(\(x){
    
    tibble(
      region = attr(x, "name"),
      n_outliers = attr(x, "n_outliers")
    )
    
  }) |> 
  bind_rows() |> 
  gt() |> 
  tab_header("No. of Dropped Farms") |> 
  opt_align_table_header("left") |> 
  tab_options(column_labels.hidden = TRUE)
No. of Dropped Farms
cmv 431
nrg 127
cib 96

5.2 Join neighboring centers

For any two centers \(C_1\) and \(C_2\) with catchment room counts \(N_1 < N_2\), if \(pN_1\) of \(C_1\)’s rooms are within a distance \(D\) of \(C_2\), then \(C_1\) is part of the same community as \(C_2\).

Here the catchment room count, \(N\), of focal center \(i\) is defined as

\[N_{c} = \sum_{i=1}^{S} R_{i} \cdot I(t_{ic} \leq D)\]

for all sites \(S\) (including both farms and centers), with \(R_{i}\) being the room count at site \(i\), \(t_{ic}\) the travel time from \(i\) to \(c\), \(D\) the threshold travel time to a center (referred to as D-join in the paper), and \(I\) the indicator function that is 1 if \(t \leq D\) and 0 otherwise. Note that \(S\) includes \(c\) and that \(t_{cc} = 0\) (the travel time from a center to the same center is precisely 0), so \(R_{c}\) is always included in \(N_{c}\).

The catchment room count within \(D\) of two centers \(c\) and \(d\) is then

\[N_{cd} = \sum_{i=1}^{S} R_{i} \cdot I(t_{ic} \leq D) \cdot I(t_{id} \leq D)\]

and

\[p_{cd} = N_{cd}/N_{c}\]

so we could also state our rule as: if \(p_{cd} \geq \alpha\) for some critical threshold \(\alpha\), then \(c\) is part of the same community as \(d\). In this case, the default threshold is 0.8 relative to a commute time distance of one half hour.

Code
join_neighboring_centers <- function(x, p, djoin){
  
  # subset dm (distance matrix) to get dm[farms and centers, centers]
  i <- x[["cell_table"]] |> pull(cell)
  j <- x[["cell_table"]] |> filter(center == 1) |> pull(cell)
  
  i <- which(rownames(x[["dm"]]) %in% i)
  j <- which(colnames(x[["dm"]]) %in% j)
  
  dm <- x[["dm"]][i, j]
  
  # room count in each farm/center
  Rk <- x[["cell_table"]] |> pull(n_room)
  names(Rk) <- x[["cell_table"]] |> pull(cell)
  
  # want to make sure these are arranged in the same order as rows in dm
  Rk <- Rk[rownames(dm)]
  
  # find farms and centers that are within distance djoin of other centers
  # this creates a binary/logical matrix of 1s and 0s,
  # which is what you get from the indicator function:
  # 1 if row is in distance djoin of column, 0 otherwise
  B <- matrix(as.numeric(dm <= djoin), nrow = nrow(dm))
  
  dimnames(B) <- dimnames(dm)
  
  # square community centers matrix
  G <- matrix(NA, nrow = ncol(dm), ncol = ncol(dm))
  
  colnames(G) <- colnames(dm)
  rownames(G) <- colnames(dm)
  
  # get total number of rooms within djoin of each center
  # this multiplies each column of B (which is just a bunch of 0s and 1s)
  # by the room count vector Rk, then takes the sum of the columns. 
  # we put this into the diagonal because it answers the question:
  # how many rooms does a center share with itself?
  # this is Nc. or Ncc since d = c.
  diag(G) <- colSums(B * Rk)
  
  # get shared room count (Ncd) within djoin of all pairs of centers
  # note we dump this into the lower triangle! this is a real R gotcha
  # as the vector is passed into the matrix by column, not row, so you 
  # can't do this with the upper triangle!
  lt <- lower.tri(G)
  
  G[lt] <- combn(
    ncol(B), 
    m = 2, 
    FUN = \(.x){ sum(B[, .x[1]] * B[, .x[2]] * Rk) }
  )
  
  # this matrix should be symmetric: 
  # the diagonal is Ncc, the room count a center shares with itself
  # the off-diagonals are Ncd = Ndc, the room counts shared between different centers
  ut <- upper.tri(G)
  
  G[ut] <- t(G)[ut]
  
  # create adjacency matrix
  # by dividing each column of G by the diagonal of G, we get the proportion
  # of the shared room count to the total room count, so p or pcd
  # we then compare this to the critical threshold, to determine adjacency
  neighbors_matrix <- (G / diag(G)) >= p

  # now to separate them into communities
  # unfortunately, there's a recursive dimension to this:
  # if a center is joined with a center that's joined with a center, etc.
  # i don't know how to do that in R, so igraph it is...
  g <- igraph::graph_from_adjacency_matrix(neighbors_matrix)
  membership <- igraph::components(g)[["membership"]]
  
  # this gives us the community that every center is part of
  x[["center_communities"]] <- tibble(
    center = names(membership) |> as.numeric(),
    community = unname(membership)
  )
  
  # collect metadata
  attr(x, "djoin") <- djoin
  attr(x, "n_communities") <- length(unique(unname(membership)))
  attr(x, "proportion") <- p
  
  x
  
}
# how many seconds in ... ?
half_hour <- 1800

cmv <- cmv |> join_neighboring_centers(p = 0.8, djoin = half_hour)
nrg <- nrg |> join_neighboring_centers(p = 0.8, djoin = half_hour)
cib <- cib |> join_neighboring_centers(p = 0.8, djoin = half_hour)

Code execution time: 4.3s

Code
list(cmv, nrg, cib) |> 
  map(\(x){
    
    tibble(
      region = attr(x, "name"),
      n_communities = attr(x, "n_communities")
    )
    
  }) |> 
  bind_rows() |> 
  gt() |> 
  tab_header("No. of Communities") |> 
  opt_align_table_header("left") |> 
  tab_options(column_labels.hidden = TRUE)
No. of Communities
cmv 104
nrg 90
cib 45

5.3 Collect members

Here we just associate each farm with its nearest community center. So, we have full community membership defined. Then we drop communities that have three or less sites associated with them. This is only done because our main goal with this analysis is to estimate the extent of farm land associated with these dispersed farming communities.

Code
collect_members <- function(x){
  
  # subset dm (distance matrix) to get dm[farms, centers]
  farms <- x[["cell_table"]] |> filter(center == 0)
  centers <- x[["cell_table"]] |> filter(center == 1)
  
  i <- which(rownames(x[["dm"]]) %in% farms[["cell"]])
  j <- which(colnames(x[["dm"]]) %in% centers[["cell"]])
  
  dm <- x[["dm"]][i, j]
  
  # get nearest center to each farm
  k <- apply(dm, 1, which.min)
  nearest_centers <- tibble(
    farm = as.numeric(names(k)),
    nearest_center = as.numeric(colnames(dm)[k])
  )
  
  # collect the farms and the centers they are closest to, 
  # and by extension the communities they are part of
  farms <- x[["cell_table"]] |> 
    filter(center == 0) |> 
    left_join(
      nearest_centers, 
      by = join_by(cell == farm)
    ) |> 
    left_join(
      x[["center_communities"]], 
      by = join_by(nearest_center == center)
    ) |> 
    select(-nearest_center)
  
  # collect the centers and their community affiliations
  centers <- x[["cell_table"]] |> 
    filter(center == 1) |> 
    left_join(
      x[["center_communities"]], 
      by = join_by(cell == center)
    )
  
  # combine and save
  x[["cell_table"]] <- bind_rows(farms, centers)
      
  x
  
}

drop_small_communities <- function(x, threshold){
  
  final_list <- x[["cell_table"]] |> 
    filter(center == 0) |> 
    group_by(community) |> 
    summarize(n = n()) |> 
    filter(n > threshold) |> 
    pull(community) |> 
    unique()
  
  x[["cell_table"]] <- x[["cell_table"]] |> filter(community %in% final_list)
  
  # update metadata
  attr(x, "min_size") <- threshold
  attr(x, "n_dropped") <- attr(x, "n_communities") - length(final_list)
  
  x
  
}
cmv <- cmv |> 
  collect_members() |> 
  drop_small_communities(threshold = 3)

nrg <- nrg |> 
  collect_members() |> 
  drop_small_communities(threshold = 3)

cib <- cib |> 
  collect_members() |> 
  drop_small_communities(threshold = 3)

Code execution time: 0.14s

Code
list(cmv, nrg, cib) |> 
  map(\(x){
    
    tibble(
      region = attr(x, "name"),
      n_dropped = attr(x, "n_dropped")
    )
    
  }) |> 
  bind_rows() |> 
  gt() |> 
  tab_header("No. of Dropped Communities") |> 
  opt_align_table_header("left") |> 
  tab_options(column_labels.hidden = TRUE)
No. of Dropped Communities
cmv 4
nrg 35
cib 23

5.4 Define boundaries

Now that we have community centers joined to their overlapping neighbors, and farms joined to their nearest centers, we can define boundaries for the resulting communities using a concave hull. We choose a concave rather than convex hull because the latter tends to artificially inflate the size of the community, often to a significant degree. On the other hand, the concave hull can generate extremely unrealistic shapes for communities, even when moving the concavity ratio closer to the convex hull.

Our somewhat brute force strategy for navigating between the Scylla and Charybdis of being too big or too strange is to first compute the least-cost path between every site on the concave hull of the community’s site points. In this case the concave hull gives us all the points on the convex hull along with some on the interior of the convex hull. The points that are excluded should have shortest paths to the outer points that at some point intersect with the shortest paths that are actually calculated, so we get a reasonable approximation and save some time doing it. We then add the vertices that define the computed paths as virtual points to the community and generate the concave hull for the expanded set of community points. The consequence is that an individual setting off from one farm in the community to any other site in the community (farm or center) should never have to leave the community. This, in effect, makes the community polygons concave with respect to time.

The result can still be a little noisy, so we simplify polygons - including, in some case, filling holes - by dilating them, that is, adding then removing a small buffer.

Note that we also do some graph pruning in each iteration, basically cropping the graph to an expanded area around the points assigned to a specific community. This prevents the search algorithm from searching out away from the other sites in the community. The distance used for pruning is only meaningful for pedestrian commute times.

Code
define_boundaries <- function(x, concavity_ratio, dilate){
  
  members <- x[["cell_table"]]
  
  sites <- x[["sites"]] |> 
    select(cell) |> 
    st_transform(attr(x, "grid_crs"))
  
  community_ids <- unique(members[["community"]])
  
  communities <- vector("list", length(community_ids))
  
  for (i in community_ids){
    
    m <- members |> filter(community == i)
    
    # get cells along the edge of point clusters using concave hull
    # so we get points that are on the convex hull and in
    # the interior of the cluster, too
    pts <- xyFromCell(x[["dem"]], m[["cell"]]) |> 
      st_multipoint() |> 
      st_sfc(crs = attr(x, "grid_crs")) |> 
      st_sf(geometry = _)
    
    cells <- pts |> 
      st_cast("POINT") |> 
      mutate(cell = m[["cell"]]) |> 
      st_filter(st_concave_hull(pts, 0.2), .predicate = st_touches) |> 
      pull(cell)
    
    # prune the graph so search doesn't go too far away from the convex hull,
    # meaning away from all the other points in the community
    # we use a somewhat arbitrary 1.5 km distance for this, so we can reasonably 
    # assume we are not chopping off the shortest path, while at the same time
    # cutting down the total number of paths to search by a considerable amount
    bb8 <- st_convex_hull(pts) |> 
      st_buffer(1500) |> 
      st_bbox() |> 
      st_as_sfc(crs = st_crs(pts))
    
    cells_in_bb8 <- cells(x[["dem"]], vect(bb8))[,2]
    
    graph <- x[["graph"]]
    
    node_ids <- graph[["dict"]] |> 
      filter(ref %in% cells_in_bb8) |> 
      pull(id)
    
    graph[["data"]] <- graph[["data"]] |> 
      filter(from %in% node_ids, to %in% node_ids)
    
    # get all short paths 
    # this returns a list with length equal to the length of cells
    # each element of the list is a vector of the grid cells that the short
    # path passes through
    paths <- get_multi_paths(graph, from = cells, to = cells)
    
    # unraveling this list gives us a vector of all the cells on shortest paths
    paths <- paths |> unlist(recursive = TRUE) |> as.numeric() |> unique()
    
    site_xy <- sites |> 
      filter(cell %in% m[["cell"]]) |> 
      st_coordinates()
    
    names(site_xy) <- tolower(names(site_xy))
    
    xy <- rbind(
      xyFromCell(x[["dem"]], paths),
      site_xy
    )
    
    border <- xy |> 
      st_multipoint() |> 
      st_sfc(crs = attr(x, "grid_crs")) |> 
      st_sf(geometry = _) |> 
      st_concave_hull(ratio = concavity_ratio) |> 
      mutate(community = i)
    
    communities[[i]] <- border
    
  }

  x[["communities"]] <- communities |> 
    bind_rows() |> 
    st_transform(attr(x, "preferred_projection")) |> 
    st_buffer(dilate[1]) |> 
    st_buffer(dilate[2]) |> 
    st_transform(4326)

  # update metadata
  attr(x, "concavity") <- concavity_ratio
  attr(x, "dilate") <- dilate
  
  x
  
}
cmv <- cmv |> define_boundaries(concavity_ratio = 0.1, dilate = c(130, -100))
nrg <- nrg |> define_boundaries(concavity_ratio = 0.1, dilate = c(130, -100))
cib <- cib |> define_boundaries(concavity_ratio = 0.1, dilate = c(130, -100))

Code execution time: 70.79s (~1.18 minutes)

6 Results

Here we show the community polygons themselves as well as the log-density of rooms in each. Note the very pronounced edge effect along Mesa Verde National Park’s administrative boundary.

Code
visualize_communities <- function(x){
  
  slope <- terrain(x[["dem"]], "slope", unit = "radians")
  aspect <- terrain(x[["dem"]], "aspect", unit = "radians")
  
  hill <- shade(slope, aspect, 30, 45)
  hill <- setValues(hill, scales::rescale(values(hill), to = c(1, 1000)))
  hill <- round(hill)
  
  h <- list(
    shade = hill,
    grays = hcl.colors(1000, "Grays")[values(hill)]
  )
  
  members <- x[["cell_table"]] |> 
    group_by(community) |> 
    summarize(n_room = sum(n_room))
  
  communities <- x[["communities"]] |> 
    left_join(members, by = "community") |> 
    mutate(
      area = st_area(geometry) |> units::set_units(km2) |> units::drop_units(),
      density = n_room/area,
      density = log(density+1),
      density = cut(density, 9, labels = FALSE)
    )
  
  ggplot() +
    ggtitle(region_labels[attr(x, "name")]) +
    geom_spatraster(
      data = h[["shade"]], 
      fill = h[["grays"]],
      maxcell = Inf
    ) +
    geom_spatraster(data = x[["dem"]], alpha = 0.5) +
    scale_fill_distiller(palette = "Greys", guide = "none") +
    ggnewscale::new_scale_fill() +
    geom_sf(
      data = communities,
      fill = "transparent",
      color = "white",
      alpha = 0.1,
      linewidth = 0.5
    ) +
    geom_sf(
      data = communities,
      aes(fill = density, color = density),
      linewidth = 0.1
    ) + 
    scale_color_gradientn(
      name = "Log Density", 
      colors = RColorBrewer::brewer.pal(9, "YlOrBr"),
      breaks = c(1, 5, 9),
      labels = c("Low", "Medium", "High"),
      guide = guide_legend()
    ) +
    scale_fill_gradientn(
      name = "Log Density", 
      colors = RColorBrewer::brewer.pal(9, "YlOrBr") |> alpha(0.6),
      breaks = c(1, 5, 9),
      labels = c("Low", "Medium", "High"),
      guide = guide_legend()
    ) +
    annotation_scale(
      aes(location = "bl", line_col = "white", text_col = "white"),
      height = unit(0.18, "cm"),
      line_width = 0.5
    ) +
    coord_sf(expand = FALSE) +
    theme(
      axis.text = element_blank(),
      axis.ticks = element_blank(),
      axis.title = element_blank(),
      legend.key = element_rect(color = "gray30", fill = "transparent"),
      plot.margin = margin(l = 2)
    )

}

zg <- list(cmv, nrg, cib) |> 
  map(visualize_communities) |> 
  patchwork::wrap_plots(ncol = 3)

# patchwork is great, but not perfect...
bob <- theme(
  legend.box.margin = margin(t = -10),
  legend.key.size = unit(0.43, "cm"),
  legend.margin = margin(),
  legend.justification = "left",
  legend.position = "bottom"
)

# had to fiddle with this to get the sizes consistent with the overview maps
fn <- here("figures", "communities-all.png")

ggsave(
  plot = zg + plot_layout(guides = "collect") & bob,
  filename = fn,
  width = 7, 
  height = 4.5,
  dpi = 600,
  bg = "white"
)

prepare_image(fn)

remove(visualize_communities, zg, bob, fn)
Map shows defined community boundaries with fill colors representing the log density of rooms in each.
Figure 3: Map shows community boundaries with fill colors representing the log density of rooms in each.

6.1 Community Distributions

Code
get_community_data <- function(x){
  
  # AREA
  A <- x[["communities"]] |> 
    st_area() |> 
    units::set_units(km2) |> 
    units::drop_units()
  
  tbl_area <- tibble(
    community = x[["communities"]][["community"]],
    area = A
  )
  
  # ROOMS
  tbl_rooms <- x[["cell_table"]] |> 
    group_by(community) |> 
    summarize(n_room = sum(n_room))
  
  # FARMS
  tbl_sites <- x[["cell_table"]] |> 
    filter(center == 0) |> 
    group_by(community) |> 
    summarize(n_farm = n())
  
  # CENTERS
  tbl_centers <- x[["cell_table"]] |> 
    filter(center == 1) |> 
    group_by(community) |> 
    summarize(n_center = n())
  
  # COMMUTES
  # subset dm (distance matrix) to get dm[farms, centers]
  farms <- x[["cell_table"]] |> filter(center == 0)
  centers <- x[["cell_table"]] |> filter(center == 1)
  
  i <- which(rownames(x[["dm"]]) %in% farms[["cell"]])
  j <- which(colnames(x[["dm"]]) %in% centers[["cell"]])
  
  dm <- x[["dm"]][i, j]
  
  min_dist <- tibble(
    farm = as.numeric(rownames(dm)),
    distance = apply(dm, 1, min)
  )
  
  tbl_commutes <- x[["cell_table"]] |> 
    filter(center == 0) |> 
    left_join(min_dist, by = join_by(cell == farm)) |> 
    group_by(community) |> 
    summarize(commute = mean(distance, na.rm = TRUE))

  # GATHER EVERYTHING INTO ONE TABLE
  tbl_area |> 
    left_join(tbl_rooms, by = "community") |> 
    left_join(tbl_sites, by = "community") |>
    left_join(tbl_centers, by = "community") |> 
    left_join(tbl_commutes, by = "community") |> 
    mutate(
      region = attr(x, "name"),
      room_density = n_room/area
    ) |> 
    relocate(region)
  
}
results <- list(cmv, nrg, cib) |> 
  map(get_community_data) |> 
  bind_rows()
Code
results |> 
  mutate(commute = commute/60) |> 
  group_by(region) |> 
  summarize(across(area:room_density, list("η" = median, "µ" = mean, "σ" = sd))) |> 
  rename_with(str_remove, pattern = "n_|room_") |> 
  pivot_longer(
    !region,
    names_to = c("variable", "statistic"),
    names_sep = "_"
  ) |> 
  pivot_wider(
    names_from = "variable",
    values_from = "value"
  ) |> 
  mutate(across(area:density, \(x) paste0(statistic, ": ", round(x,2)))) |> 
  group_by(region) |> 
  summarize(across(area:density, \(x) paste0(x, collapse = "<br>"))) |> 
  rename(
    " " = region,
    "Area (km2)" = area,
    "Rooms (N)" = room,
    "Farms (N)" = farm,
    "Centers (N)" = center,
    "Commute (mins)" = commute,
    "Room Density (N/km2)" = density
  ) |> 
  slice(c(2, 3, 1)) |> 
  gt() |> 
  tab_header(
    title = "Community summaries",
    subtitle = md("η = median, µ = mean, σ = standard deviation")
  ) |> 
  fmt_markdown(columns = everything()) |> 
  cols_width(
    " " ~ pct(8),
    "Area (km2)" ~ pct(13),
    "Rooms (N)" ~ pct(13),
    "Farms (N)" ~ pct(13),
    "Centers (N)" ~ pct(13),
    "Commute (mins)" ~ pct(18),
    "Room Density (N/km2)" ~ pct(22)
  ) |> 
  opt_align_table_header("left")
Community summaries
η = median, µ = mean, σ = standard deviation
Area (km2) Rooms (N) Farms (N) Centers (N) Commute (mins) Room Density (N/km2)

cmv

η: 7.1
µ: 8.68
σ: 6.73

η: 402
µ: 647.76
σ: 701.36

η: 24.5
µ: 44.38
σ: 55.86

η: 1
µ: 1.65
σ: 1.37

η: 22.35
µ: 23.28
σ: 8.83

η: 57.49
µ: 99.69
σ: 97.36

nrg

η: 5.53
µ: 6.06
σ: 4.56

η: 479
µ: 593.49
σ: 444.7

η: 20
µ: 36.73
σ: 34.96

η: 2
µ: 2.49
σ: 2.06

η: 19.12
µ: 19.04
σ: 8.77

η: 99.43
µ: 219.77
σ: 450.86

cib

η: 4.37
µ: 7.19
σ: 7.39

η: 492
µ: 766.68
σ: 626.09

η: 13.5
µ: 24.14
σ: 25.34

η: 2
µ: 2
σ: 1.11

η: 21.16
µ: 24.74
σ: 12.27

η: 170.25
µ: 238.76
σ: 270.64

Code
lbls <- results |> 
  group_by(region) |> 
  summarize(x = quantile(area/n_center, p = 0.75)) |> 
  mutate(
    variable = "center_area",
    y = c(1.25, 3.25, 2.25),
    label = case_when(
      region == "cib" ~ "Cibola",
      region == "cmv" ~ "Central Mesa Verde",
      region == "nrg" ~ "Northern Rio Grande",
      TRUE ~ region
    )
  )

q95 <- quantile(results[["room_density"]], 0.95)

gg <- results |> 
  mutate(
    farm_area = area/n_farm,
    center_area = area/n_center,
    commute = commute/60,
    farm_ratio = n_farm/n_center,
    room_density = ifelse(room_density > q95, NA, room_density)
  ) |> 
  select(-n_room, -n_farm, -n_center) |> 
  pivot_longer(c(-region, -community), names_to = "variable") |> 
  ggplot() + 
  geom_boxplot(aes(
    x = value, 
    y = fct_relevel(region, "cib", "nrg"), 
    fill = region
  )) +
  geom_text(
    data = lbls,
    aes(x, y, label = label, color = region),
    size = 11.5/.pt,
    nudge_x = 1,
    hjust = 0
  ) +
  scale_color_manual(values = region_colors) +
  scale_fill_manual(values = alpha(region_colors, 0.6)) +
  labs(
    x = NULL,
    y = NULL
  ) +
  facet_wrap(
    vars(variable), 
    ncol = 2, 
    nrow = 3,
    scale = "free_x",
    strip.position = "bottom",
    labeller = pretty_labels
  ) +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "none",
    panel.grid.major.x = element_line(linewidth = 0.2, color = "gray85"),
    strip.placement = "outside",
    strip.text = element_text(size = rel(1), margin = margin(t = -1, b = 10))
  )

fn <- here("figures", "results-boxplot.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 6.5,
  height = 7.0,
  dpi = 600
)

prepare_image(fn)

remove(lbls, lblr, gg, fn)
Distributions of important variables across communities and regions.
Figure 4: Distributions of important variables across communities and regions. Some of the extreme room density estimates were dropped to make the main distribution more visible.

6.2 Save results

gpkg <- here("data", "community-centers.gpkg")

bind_rows(
  cmv[["sites"]] |> st_join(cmv[["communities"]]),
  nrg[["sites"]] |> st_join(nrg[["communities"]]),
  cib[["sites"]] |> st_join(cib[["communities"]])
) |> 
  st_drop_geometry() |> 
  write_sf(gpkg, layer = "members")

bind_rows(
  cmv[["communities"]] |> mutate(region = "cmv"), 
  nrg[["communities"]] |> mutate(region = "nrg"), 
  cib[["communities"]] |> mutate(region = "cib")
) |> 
  left_join(results, by = c("region", "community")) |> 
  write_sf(gpkg, layer = "communities")

7 Sensitivity Analysis

This is expensive computing, but it’s worth checking. Note that we don’t do every combination of parameters, but we do get decent variety relative to the default values chosen above. Most of the simulations take about 4 minutes to run on my machine. The exceptions are those involving high resolution graphs (constructed from DEMs with a low aggregation factor). Those take considerably longer, as much as an hour, even with all the shortcuts I added. There are 19 total simulations. All together, they take about 2.2 hours to run.

For reference, here are the specs on my machine:

  • CPU: AMD Ryzen 7 5800U with Radeon Graphics
    • Base speed: 1.90 GHz
    • Cores: 8
    • Logical processors: 16
  • RAM: 16.0 GB (15.4 GB usable)
  • System type: 64-bit operating system, x64-based processor
  • OS: Windows 11 Home
Code
run_simulation <- function(.x, .args){
  
  .x |> 
    build_region() |> 
    aggregate_grid(.factor = .args[["agg_factor"]]) |> 
    group_sites_by_cell() |> 
    add_graph(max_slope = 45) |> 
    add_distance_matrix() |> 
    remove_distant_farms(dmax = .args[["dmax"]]) |> 
    join_neighboring_centers(p = .args[["p"]], djoin = .args[["djoin"]]) |> 
    collect_members() |> 
    drop_small_communities(threshold = 3) |> 
    define_boundaries(concavity_ratio = 0.1, dilate = c(130, -100)) |> 
    get_community_data()

}

sensitivity <- function(x, args){

  # it seems my computer isn't up to the task of doing this one in parallel
  if (args[["agg_factor"]] == 1){
    
    dfs <- map(x, \(.x, .args = args){ 
      
      run_simulation(.x, .args)
      
    })
    
  } else {
    
    # there's no randomization in this code, but furrr belches out warnings
    # anyway, so i add the furrr option to set seed.
    dfs <- future_map(x, \(.x, .args = args){ 
      
      options(scipen = 9999)
      terra::terraOptions(progress = 0, print = FALSE)
      
      run_simulation(.x, .args)
      
    }, .options = furrr_options(seed = 1701))
    
  }
  
  bind_rows(dfs)
  
}
sensitivity_parameters <- bind_rows(
  tibble(
    focal_variable = "agg_factor",
    agg_factor = 1:4,
    dmax = one_hour,
    p = 0.8,
    djoin = half_hour
  ),
  tibble(
    focal_variable = "dmax",
    agg_factor = 3,
    dmax = 60 * seq(40, 80, by = 10),
    p = 0.8,
    djoin = half_hour
  ),
  tibble(
    focal_variable = "p",
    agg_factor = 3,
    dmax = one_hour,
    p = seq(0.5, 0.9, by = 0.1),
    djoin = half_hour
  ),
  tibble(
    focal_variable = "djoin",
    agg_factor = 3,
    dmax = one_hour,
    p = 0.8,
    djoin = 60 * seq(20, 40, by = 5)
  )
)

sensitivity_results <- sensitivity_parameters |> 
  rowwise() |> 
  mutate(
    analysis = list(sensitivity(
      c("cmv", "nrg", "cib"), 
      args = list(
        "agg_factor" = agg_factor, 
        "dmax" = dmax, 
        "p" = p, 
        "djoin" = djoin
      )
    ))
  ) |> 
  ungroup()

Code execution time: 8143.62s (~2.26 hours)

Code
graph_data <- sensitivity_results |> 
  mutate(analysis = map(analysis, \(.x){
    
    .x |>
      group_by(region) |>
      summarize(
        n_communities = n(),
        area = median(area),
        commute = median(commute/60),
        room_density = median(room_density)
      )
    
  })) |> 
  unnest(analysis) |> 
  pivot_longer(
    c(n_communities, area, room_density, commute),
    names_to = "outcome_variable",
    values_to = "outcome_value"
  ) |> 
  mutate(
    focal_value = case_when(
      focal_variable == "dmax" ~ dmax,
      focal_variable == "p" ~ p,
      focal_variable == "djoin" ~ djoin,
      focal_variable == "agg_factor" ~ agg_factor,
      TRUE ~ NA
    ),
    focal_value = case_when(
      focal_variable == "agg_factor" & focal_value == 1 ~ 27,
      focal_variable == "agg_factor" & focal_value == 2 ~ 55,
      focal_variable == "agg_factor" & focal_value == 3 ~ 82,
      focal_variable == "agg_factor" & focal_value == 4 ~ 109,
      TRUE ~ focal_value
    ),
    region = fct_relevel(region, "cmv", "nrg")
  )

agg_factor <- graph_data |> 
  filter(focal_variable == "agg_factor") |> 
  pull(focal_value) |> 
  unique() |> 
  sort()
dmax <- 60 * seq(40, 80, by = 10)
djoin <- 60 * seq(20, 40, by = 5)

x_scale <- function(x, .labels = x){
  scale_x_continuous(breaks = x, labels = .labels)
}

x_scales <- list(
  focal_variable == "agg_factor" ~ x_scale(agg_factor),
  focal_variable == "djoin" ~ x_scale(djoin, .labels = djoin/60),
  focal_variable == "dmax" ~ x_scale(dmax, .labels = dmax/60),
  focal_variable == "p" ~ x_scale(seq(0.5, 0.9, by = 0.1))
)

gg <- ggplot(graph_data, aes(focal_value, outcome_value, color = region)) + 
  geom_line() +
  geom_point(size = 1.2) +
  scale_color_manual(
    name = NULL, 
    values = region_colors,
    labels = region_labels
  ) +
  facet_grid(
    rows = vars(outcome_variable),
    cols = vars(focal_variable),
    switch = "both",
    labeller = squished_labels,
    scales = "free"
  ) +
  facetted_pos_scales(x = x_scales) +
  theme(
    axis.title = element_blank(),
    legend.box.margin = margin(b = -7),
    legend.direction = "vertical",
    legend.justification = "left",
    legend.key.height = unit(10, "pt"),
    legend.margin = margin(),
    legend.position = "top",
    legend.spacing.y = unit(5, "pt"),
    strip.placement = "outside",
    strip.text = element_text(size = rel(1), margin = margin(t = -1, b = 10))
  ) +
  guides(color = guide_legend(byrow = TRUE))

fn <- here("figures", "sensitivity-analysis.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 7.1,
  height = 7.3,
  dpi = 600
)

prepare_image(fn)

remove(graph_data, agg_factor, dmax, djoin, x_scale, x_scales, gg, fn)

Figure shows results of sensitivity analysis.

Figure shows results of sensitivity analysis, focusing on three main paremeters (djoin, dmax, and p) and the level of aggregation of the grid, which corresponds to its resolution. Measures of some key outcomes are also shown, including the number of communities, their total area, room density, and average commute time from farms to the nearest community center.

7.1 Save Results

sensitivity_results |> 
  unnest(analysis) |> 
  write_csv(file = here("data", "sensitivity-analysis.csv"))

8 Session Info

Code
# save the session info as an object
pkg_sesh <- sessioninfo::session_info(pkgs = "attached")

# inject the quarto info
pkg_sesh$platform$quarto <- paste(
  quarto::quarto_version(), 
  "@", 
  quarto::quarto_path()
)

# print it out
pkg_sesh
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.3.1 (2023-06-16 ucrt)
 os       Windows 11 x64 (build 22631)
 system   x86_64, mingw32
 ui       RTerm
 language (EN)
 collate  English_United States.utf8
 ctype    English_United States.utf8
 tz       America/Denver
 date     2024-03-06
 pandoc   3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
 quarto   1.4.545 @ C:\\PROGRA~1\\Quarto\\bin\\quarto.exe

─ Packages ───────────────────────────────────────────────────────────────────
 package    * version date (UTC) lib source
 cppRouting * 3.1     2022-12-01 [1] CRAN (R 4.3.1)
 dplyr      * 1.1.4   2023-11-17 [1] CRAN (R 4.3.1)
 forcats    * 1.0.0   2023-01-29 [1] CRAN (R 4.3.1)
 furrr      * 0.3.1   2022-08-15 [1] CRAN (R 4.3.1)
 future     * 1.33.1  2023-12-22 [1] CRAN (R 4.3.2)
 ggfx       * 1.0.1   2022-08-22 [1] CRAN (R 4.3.1)
 ggh4x      * 0.2.8   2024-01-23 [1] CRAN (R 4.3.2)
 ggnewscale * 0.4.10  2024-02-08 [1] CRAN (R 4.3.2)
 ggplot2    * 3.4.4   2023-10-12 [1] CRAN (R 4.3.2)
 ggspatial  * 1.1.9   2023-08-17 [1] CRAN (R 4.3.1)
 gt         * 0.10.1  2024-01-17 [1] CRAN (R 4.3.2)
 here       * 1.0.1   2020-12-13 [1] CRAN (R 4.3.1)
 igraph     * 2.0.1.1 2024-01-30 [1] CRAN (R 4.3.2)
 lubridate  * 1.9.3   2023-09-27 [1] CRAN (R 4.3.2)
 magick     * 2.8.2   2023-12-20 [1] CRAN (R 4.3.2)
 patchwork  * 1.2.0   2024-01-08 [1] CRAN (R 4.3.2)
 purrr      * 1.0.2   2023-08-10 [1] CRAN (R 4.3.1)
 readr      * 2.1.5   2024-01-10 [1] CRAN (R 4.3.2)
 sf         * 1.0-15  2023-12-18 [1] CRAN (R 4.3.2)
 stringr    * 1.5.1   2023-11-14 [1] CRAN (R 4.3.1)
 terra      * 1.7-71  2024-01-31 [1] CRAN (R 4.3.2)
 tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.1)
 tidyr      * 1.3.1   2024-01-24 [1] CRAN (R 4.3.2)
 tidyterra  * 0.5.2   2024-01-19 [1] CRAN (R 4.3.1)
 tidyverse  * 2.0.0   2023-02-22 [1] CRAN (R 4.3.1)

 [1] C:/Users/kenne/AppData/Local/R/win-library/4.3
 [2] C:/Program Files/R/R-4.3.1/library

──────────────────────────────────────────────────────────────────────────────